import numpy as np
import pandas as pd
import cufflinks as cf
cf.go_offline()
import sys
import os
from tqdm import tqdm, notebook # progress bar
# user defined functions
sys.path.append(os.getcwd())
from utility.utility import compute_weights, compute_beta, compute_summary
raw_data = pd.read_csv(r'data.csv').sort_values(['ticker', 'date'], ascending = [True, True])
print(raw_data.dtypes)
raw_data.head()
ticker object date object last float64 volume int64 dtype: object
| ticker | date | last | volume | |
|---|---|---|---|---|
| 0 | 1332 JT | 2013-01-04 | 169.0987 | 1464100 |
| 1 | 1332 JT | 2013-01-07 | 166.3266 | 1783500 |
| 2 | 1332 JT | 2013-01-08 | 166.3266 | 1759800 |
| 3 | 1332 JT | 2013-01-09 | 165.4026 | 767800 |
| 4 | 1332 JT | 2013-01-10 | 167.2507 | 1503100 |
## check whether there is duplicated data
print('There are {} duplicated data.'.format(raw_data.duplicated(subset = ['ticker', 'date'], keep='first').sum()))
There are 0 duplicated data.
## transform data into wide panel format
price = raw_data.set_index(['ticker', 'date'])['last'].unstack('ticker')
ret = np.log(1+price.pct_change(limit = 5))
volume = raw_data.set_index(['ticker', 'date'])['volume'].unstack('ticker')
adv = (price * volume).rolling(window = 21, min_periods = 10).mean()
## construct universe: non-null close price and adv > 5M USD (assuming 122 Yen = 1 USD)
universe = price.notnull() & (adv > 5e6 * 122)
del raw_data
# beta of the stocks
beta = compute_beta(ret, universe)
beta.tail()
| ticker | 1332 JT | 1333 JT | 1334 JT | 1605 JT | 1721 JT | 1801 JT | 1802 JT | 1803 JT | 1808 JT | 1812 JT | ... | 9503 JT | 9531 JT | 9532 JT | 9602 JT | 9613 JT | 9681 JT | 9735 JT | 9766 JT | 9983 JT | 9984 JT |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| date | |||||||||||||||||||||
| 2021-03-15 | 1.010226 | 0.771635 | NaN | 1.206411 | 0.685761 | 0.920795 | 1.031198 | 0.991042 | 0.933728 | 0.977613 | ... | 0.705662 | 0.444550 | 0.464190 | 0.615555 | 0.993759 | NaN | 0.606908 | 1.031077 | 0.863996 | 1.008591 |
| 2021-03-16 | 1.008427 | 0.770848 | NaN | 1.206634 | 0.684400 | 0.921100 | 1.031207 | 0.991837 | 0.935217 | 0.977828 | ... | 0.706140 | 0.445712 | 0.465095 | 0.613816 | 0.994271 | NaN | 0.606908 | 1.031340 | 0.863416 | 1.008855 |
| 2021-03-17 | 1.006201 | 0.768698 | NaN | 1.209510 | 0.684969 | 0.920366 | 1.030430 | 0.992077 | 0.938196 | 0.976504 | ... | 0.708009 | 0.446120 | 0.464446 | 0.612198 | 0.998420 | NaN | 0.605787 | 1.032345 | 0.862462 | 1.013885 |
| 2021-03-18 | 1.005117 | 0.767918 | NaN | 1.208405 | 0.686675 | 0.921769 | 1.031637 | 0.992444 | 0.942085 | 0.977225 | ... | 0.708003 | 0.445024 | 0.464360 | 0.613058 | 0.996861 | NaN | 0.605578 | 1.028450 | 0.862796 | 1.008964 |
| 2021-03-19 | 1.004974 | 0.767944 | NaN | 1.207817 | 0.686424 | 0.922450 | 1.033180 | 0.993789 | 0.944866 | 0.977980 | ... | 0.707719 | 0.444282 | 0.463374 | 0.612052 | 0.994768 | NaN | 0.604161 | 1.026329 | 0.858414 | 1.006682 |
5 rows × 248 columns
df_fset = pd.DataFrame()
# past daily return
for t in tqdm(range(1, 121), desc='Progress', position=0, leave=True):
df_fset['R{}'.format(t)] = ret.shift(t).stack()
# past daily volumn
for t in tqdm(range(1, 22), desc='Progress', position=0, leave=True):
df_fset['V{}'.format(t)] = volume.shift(t).stack()
# past cumulative return
date_range_list = [(-252, -125), (-125, -21)]
for beg, end in tqdm(date_range_list, desc='Progress', position=0, leave=True):
win_roll = end - beg
min_roll = win_roll//2
df_fset['Cret{}_{}'.format(-beg, -end)] = ret.rolling(window=win_roll, min_periods=min_roll).mean().shift(-end).stack()
df_fset['y'] = ret.stack()
df_fset
Progress: 100%|██████████████████████████████████████████████████████████████████████| 120/120 [01:10<00:00, 1.71it/s] Progress: 100%|████████████████████████████████████████████████████████████████████████| 21/21 [00:12<00:00, 1.68it/s] Progress: 100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:01<00:00, 1.70it/s]
| R1 | R2 | R3 | R4 | R5 | R6 | R7 | R8 | R9 | R10 | ... | V15 | V16 | V17 | V18 | V19 | V20 | V21 | Cret252_125 | Cret125_21 | y | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| date | ticker | |||||||||||||||||||||
| 2013-01-08 | 1332 JT | -0.016529 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.000000 |
| 1334 JT | -0.019802 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.000000 | |
| 1605 JT | -0.010893 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | -0.004391 | |
| 1721 JT | -0.028094 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.015504 | |
| 1801 JT | 0.003384 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | -0.017036 | |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2021-03-19 | 9613 JT | 0.003396 | 0.009112 | -0.010811 | 0.009096 | 0.001143 | 0.000000 | 0.007461 | 0.013921 | -0.017945 | 0.027925 | ... | 5233200.0 | 3285700.0 | 4675700.0 | 2804000.0 | 2459500.0 | 3040100.0 | 2951900.0 | 0.000043 | 0.002761 | -0.026913 |
| 9735 JT | 0.002064 | -0.001445 | -0.003296 | 0.012519 | 0.020936 | 0.005009 | 0.007077 | 0.003989 | 0.004874 | 0.007738 | ... | 711000.0 | 584300.0 | 863300.0 | 334300.0 | 486900.0 | 424900.0 | 362200.0 | 0.001269 | 0.000008 | -0.010988 | |
| 9766 JT | -0.012579 | 0.000000 | 0.015396 | -0.040088 | 0.020535 | 0.023794 | -0.002829 | 0.022858 | 0.005797 | -0.010123 | ... | 618500.0 | 577100.0 | 793100.0 | 294300.0 | 391600.0 | 458500.0 | 357500.0 | 0.001549 | 0.004907 | -0.037254 | |
| 9983 JT | 0.009641 | 0.008473 | -0.002099 | 0.007260 | -0.001372 | 0.033018 | -0.024440 | -0.002762 | -0.016724 | -0.034447 | ... | 773600.0 | 621000.0 | 749700.0 | 451300.0 | 731800.0 | 838600.0 | 455800.0 | 0.001638 | 0.004076 | -0.062910 | |
| 9984 JT | -0.017459 | -0.020933 | 0.023822 | -0.025233 | 0.032978 | 0.016659 | -0.000988 | 0.031284 | -0.023853 | 0.000497 | ... | 14637700.0 | 10565100.0 | 12776600.0 | 9338300.0 | 8365000.0 | 9582500.0 | 10469800.0 | 0.001644 | 0.005554 | -0.024866 |
450759 rows × 144 columns
df_fset.dropna(inplace=True)
df_fset
| R1 | R2 | R3 | R4 | R5 | R6 | R7 | R8 | R9 | R10 | ... | V15 | V16 | V17 | V18 | V19 | V20 | V21 | Cret252_125 | Cret125_21 | y | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| date | ticker | |||||||||||||||||||||
| 2013-10-08 | 1332 JT | -0.014963 | -0.009852 | 0.000000 | -0.043172 | 0.009434 | -0.014118 | -0.004662 | 0.028304 | 0.004796 | -0.004796 | ... | 2906200.0 | 634900.0 | 1575800.0 | 1577700.0 | 1030400.0 | 1810200.0 | 1155500.0 | -0.000351 | 0.001351 | 0.000000 |
| 1334 JT | -0.005540 | -0.005509 | 0.000000 | -0.021740 | 0.000000 | -0.010695 | -0.010582 | 0.015916 | -0.010638 | 0.000000 | ... | 3205000.0 | 711000.0 | 908000.0 | 2332000.0 | 1674000.0 | 1633000.0 | 1042000.0 | 0.002570 | 0.000631 | -0.005571 | |
| 1605 JT | -0.025068 | -0.010554 | -0.011309 | -0.000865 | -0.000864 | -0.002587 | 0.006048 | 0.008048 | 0.005432 | -0.014062 | ... | 5908400.0 | 5617600.0 | 5580000.0 | 5056000.0 | 4266000.0 | 4926000.0 | 4836000.0 | 0.001192 | -0.000623 | -0.013693 | |
| 1721 JT | -0.006187 | -0.009973 | -0.018904 | -0.025149 | 0.000731 | -0.017392 | -0.008584 | 0.021599 | -0.010065 | 0.001432 | ... | 2214000.0 | 675500.0 | 764200.0 | 858900.0 | 790600.0 | 679300.0 | 636800.0 | 0.000252 | 0.001276 | 0.001550 | |
| 1801 JT | -0.025808 | -0.010560 | 0.008439 | -0.016807 | -0.006231 | 0.000000 | -0.008247 | 0.038728 | -0.029291 | -0.006167 | ... | 10328600.0 | 24622000.0 | 30421400.0 | 59204400.0 | 25968600.0 | 6966600.0 | 3784200.0 | -0.001492 | 0.004483 | 0.010834 | |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2021-03-19 | 9613 JT | 0.003396 | 0.009112 | -0.010811 | 0.009096 | 0.001143 | 0.000000 | 0.007461 | 0.013921 | -0.017945 | 0.027925 | ... | 5233200.0 | 3285700.0 | 4675700.0 | 2804000.0 | 2459500.0 | 3040100.0 | 2951900.0 | 0.000043 | 0.002761 | -0.026913 |
| 9735 JT | 0.002064 | -0.001445 | -0.003296 | 0.012519 | 0.020936 | 0.005009 | 0.007077 | 0.003989 | 0.004874 | 0.007738 | ... | 711000.0 | 584300.0 | 863300.0 | 334300.0 | 486900.0 | 424900.0 | 362200.0 | 0.001269 | 0.000008 | -0.010988 | |
| 9766 JT | -0.012579 | 0.000000 | 0.015396 | -0.040088 | 0.020535 | 0.023794 | -0.002829 | 0.022858 | 0.005797 | -0.010123 | ... | 618500.0 | 577100.0 | 793100.0 | 294300.0 | 391600.0 | 458500.0 | 357500.0 | 0.001549 | 0.004907 | -0.037254 | |
| 9983 JT | 0.009641 | 0.008473 | -0.002099 | 0.007260 | -0.001372 | 0.033018 | -0.024440 | -0.002762 | -0.016724 | -0.034447 | ... | 773600.0 | 621000.0 | 749700.0 | 451300.0 | 731800.0 | 838600.0 | 455800.0 | 0.001638 | 0.004076 | -0.062910 | |
| 9984 JT | -0.017459 | -0.020933 | 0.023822 | -0.025233 | 0.032978 | 0.016659 | -0.000988 | 0.031284 | -0.023853 | 0.000497 | ... | 14637700.0 | 10565100.0 | 12776600.0 | 9338300.0 | 8365000.0 | 9582500.0 | 10469800.0 | 0.001644 | 0.005554 | -0.024866 |
404780 rows × 144 columns
from sklearn.model_selection import GridSearchCV, StratifiedKFold, train_test_split
from sklearn.ensemble import VotingClassifier, RandomForestClassifier
import statsmodels.api as sm
from sklearn.linear_model import LassoCV
from keras import models, layers
from lightgbm import LGBMRegressor
x_list = [x for x in df_fset.columns if 'y' not in x]
universe_test = universe.loc['2015-01':]
universe_test
| ticker | 1332 JT | 1333 JT | 1334 JT | 1605 JT | 1721 JT | 1801 JT | 1802 JT | 1803 JT | 1808 JT | 1812 JT | ... | 9503 JT | 9531 JT | 9532 JT | 9602 JT | 9613 JT | 9681 JT | 9735 JT | 9766 JT | 9983 JT | 9984 JT |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| date | |||||||||||||||||||||
| 2015-01-05 | True | False | False | True | True | True | True | True | False | True | ... | True | True | True | True | True | True | True | True | True | True |
| 2015-01-06 | True | False | False | True | True | True | True | True | False | True | ... | True | True | True | True | True | True | True | True | True | True |
| 2015-01-07 | True | False | False | True | True | True | True | True | False | True | ... | True | True | True | True | True | True | True | True | True | True |
| 2015-01-08 | True | False | False | True | True | True | True | True | False | True | ... | True | True | True | True | True | True | True | True | True | True |
| 2015-01-09 | True | False | False | True | True | True | True | True | False | True | ... | True | True | True | True | True | True | True | True | True | True |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2021-03-15 | True | False | False | True | True | True | True | True | True | True | ... | True | True | True | True | True | False | True | True | True | True |
| 2021-03-16 | True | False | False | True | True | True | True | True | True | True | ... | True | True | True | True | True | False | True | True | True | True |
| 2021-03-17 | True | False | False | True | True | True | True | True | True | True | ... | True | True | True | True | True | False | True | True | True | True |
| 2021-03-18 | True | False | False | True | True | True | True | True | True | True | ... | True | True | True | True | True | False | True | True | True | True |
| 2021-03-19 | True | False | False | True | True | True | True | True | True | True | ... | True | True | True | True | True | False | True | True | True | True |
1516 rows × 248 columns
df_score = pd.DataFrame(columns=universe.columns) # empty dataframe to append
beg_train_list = pd.date_range('2013-01', '2019-01', freq='M').strftime('%Y-%m')
beg_test_list = pd.date_range('2015-01', '2021-03', freq='M').strftime('%Y-%m')
end_test_list = pd.date_range('2015-02', '2021-04', freq='M').strftime('%Y-%m')
date_list = list(zip(beg_train_list, beg_test_list, end_test_list))
for beg_train, beg_test, end_test in tqdm(date_list, leave=True):
X_train_val = df_fset[x_list].loc[beg_train:beg_test, :]
y_train_val = df_fset['y'].loc[beg_train:beg_test, :]
X_test = df_fset[x_list].loc[beg_test:end_test, :]
model_ols = sm.OLS(y_train_val, sm.add_constant(X_train_val)).fit()
OLS_score = model_ols.predict(sm.add_constant(X_test)).unstack()
df_score = df_score.append(OLS_score)
df_score.tail()
100%|██████████████████████████████████████████████████████████████████████████████████| 84/84 [05:40<00:00, 4.05s/it]
| ticker | 1332 JT | 1333 JT | 1334 JT | 1605 JT | 1721 JT | 1801 JT | 1802 JT | 1803 JT | 1808 JT | 1812 JT | ... | 9503 JT | 9531 JT | 9532 JT | 9602 JT | 9613 JT | 9681 JT | 9735 JT | 9766 JT | 9983 JT | 9984 JT |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2021-03-15 | 0.002312 | 0.003217 | NaN | -0.000376 | -0.002625 | 0.000733 | 0.004136 | 0.003024 | -0.002064 | -0.001430 | ... | 0.001337 | -0.000499 | 0.000999 | -0.000315 | -0.008496 | NaN | 0.003134 | 0.001663 | 0.003173 | 0.004026 |
| 2021-03-16 | -0.001900 | -0.004763 | NaN | -0.001257 | -0.001328 | -0.003604 | -0.005215 | -0.003844 | -0.000022 | -0.002954 | ... | -0.002775 | -0.007439 | -0.005836 | -0.002501 | -0.003085 | NaN | -0.005647 | -0.013153 | -0.004076 | -0.005018 |
| 2021-03-17 | 0.000414 | 0.002123 | NaN | 0.004094 | -0.002294 | -0.002652 | 0.001127 | -0.000159 | 0.005685 | 0.001094 | ... | 0.000647 | -0.001683 | 0.001577 | 0.004532 | -0.005560 | NaN | -0.000289 | -0.005337 | -0.009999 | 0.006334 |
| 2021-03-18 | -0.007788 | -0.006948 | NaN | -0.009021 | -0.004904 | -0.005547 | -0.007436 | -0.007110 | -0.002246 | -0.008252 | ... | -0.007085 | 0.001898 | -0.003948 | -0.011159 | -0.001865 | NaN | -0.003162 | -0.003755 | -0.005084 | -0.008572 |
| 2021-03-19 | 0.004560 | 0.007410 | NaN | 0.000689 | 0.000198 | 0.000635 | 0.002995 | 0.001258 | 0.003398 | -0.002472 | ... | -0.000200 | -0.001334 | 0.000700 | -0.001269 | -0.000475 | NaN | 0.000537 | -0.002995 | -0.005119 | -0.001387 |
5 rows × 248 columns
model_ols.summary()
| Dep. Variable: | y | R-squared: | 0.075 |
|---|---|---|---|
| Model: | OLS | Adj. R-squared: | 0.073 |
| Method: | Least Squares | F-statistic: | 39.73 |
| Date: | Fri, 25 Mar 2022 | Prob (F-statistic): | 0.00 |
| Time: | 22:37:40 | Log-Likelihood: | 1.6406e+05 |
| No. Observations: | 70146 | AIC: | -3.278e+05 |
| Df Residuals: | 70002 | BIC: | -3.265e+05 |
| Df Model: | 143 | ||
| Covariance Type: | nonrobust |
| coef | std err | t | P>|t| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| const | 0.0002 | 0.000 | 2.005 | 0.045 | 4.73e-06 | 0.000 |
| R1 | 0.0569 | 0.004 | 15.019 | 0.000 | 0.049 | 0.064 |
| R2 | 0.0150 | 0.004 | 3.957 | 0.000 | 0.008 | 0.022 |
| R3 | -0.0394 | 0.004 | -10.379 | 0.000 | -0.047 | -0.032 |
| R4 | 0.0503 | 0.004 | 13.228 | 0.000 | 0.043 | 0.058 |
| R5 | 0.0011 | 0.004 | 0.290 | 0.772 | -0.006 | 0.009 |
| R6 | -0.0055 | 0.004 | -1.449 | 0.147 | -0.013 | 0.002 |
| R7 | -0.0515 | 0.004 | -13.509 | 0.000 | -0.059 | -0.044 |
| R8 | 0.0047 | 0.004 | 1.241 | 0.215 | -0.003 | 0.012 |
| R9 | 0.0249 | 0.004 | 6.518 | 0.000 | 0.017 | 0.032 |
| R10 | 0.0010 | 0.004 | 0.273 | 0.785 | -0.006 | 0.009 |
| R11 | 0.0106 | 0.004 | 2.780 | 0.005 | 0.003 | 0.018 |
| R12 | 0.0238 | 0.004 | 6.209 | 0.000 | 0.016 | 0.031 |
| R13 | 0.0276 | 0.004 | 7.189 | 0.000 | 0.020 | 0.035 |
| R14 | 0.0317 | 0.004 | 8.272 | 0.000 | 0.024 | 0.039 |
| R15 | -0.0230 | 0.004 | -6.002 | 0.000 | -0.031 | -0.015 |
| R16 | -0.0089 | 0.004 | -2.323 | 0.020 | -0.016 | -0.001 |
| R17 | -0.0291 | 0.004 | -7.566 | 0.000 | -0.037 | -0.022 |
| R18 | 0.0019 | 0.004 | 0.503 | 0.615 | -0.006 | 0.009 |
| R19 | 0.0209 | 0.004 | 5.441 | 0.000 | 0.013 | 0.028 |
| R20 | -0.0454 | 0.004 | -11.828 | 0.000 | -0.053 | -0.038 |
| R21 | 0.0592 | 0.004 | 13.731 | 0.000 | 0.051 | 0.068 |
| R22 | 0.0024 | 0.004 | 0.557 | 0.578 | -0.006 | 0.011 |
| R23 | 0.0104 | 0.004 | 2.439 | 0.015 | 0.002 | 0.019 |
| R24 | 0.0049 | 0.004 | 1.147 | 0.251 | -0.003 | 0.013 |
| R25 | 0.0263 | 0.004 | 6.208 | 0.000 | 0.018 | 0.035 |
| R26 | 0.0165 | 0.004 | 3.901 | 0.000 | 0.008 | 0.025 |
| R27 | 0.0336 | 0.004 | 7.906 | 0.000 | 0.025 | 0.042 |
| R28 | -0.0080 | 0.004 | -1.847 | 0.065 | -0.016 | 0.000 |
| R29 | -0.0164 | 0.004 | -3.755 | 0.000 | -0.025 | -0.008 |
| R30 | 0.0372 | 0.004 | 8.495 | 0.000 | 0.029 | 0.046 |
| R31 | -0.0129 | 0.004 | -2.941 | 0.003 | -0.021 | -0.004 |
| R32 | 0.0275 | 0.004 | 6.383 | 0.000 | 0.019 | 0.036 |
| R33 | 0.0135 | 0.004 | 3.118 | 0.002 | 0.005 | 0.022 |
| R34 | 0.0240 | 0.004 | 5.591 | 0.000 | 0.016 | 0.032 |
| R35 | -0.0226 | 0.004 | -5.312 | 0.000 | -0.031 | -0.014 |
| R36 | -0.0384 | 0.004 | -8.966 | 0.000 | -0.047 | -0.030 |
| R37 | 0.0383 | 0.004 | 8.996 | 0.000 | 0.030 | 0.047 |
| R38 | -0.0066 | 0.004 | -1.547 | 0.122 | -0.015 | 0.002 |
| R39 | 0.0107 | 0.004 | 2.497 | 0.013 | 0.002 | 0.019 |
| R40 | 0.0390 | 0.004 | 9.147 | 0.000 | 0.031 | 0.047 |
| R41 | 0.0163 | 0.004 | 3.810 | 0.000 | 0.008 | 0.025 |
| R42 | -0.0074 | 0.004 | -1.706 | 0.088 | -0.016 | 0.001 |
| R43 | 0.0051 | 0.004 | 1.177 | 0.239 | -0.003 | 0.014 |
| R44 | -0.0141 | 0.004 | -3.214 | 0.001 | -0.023 | -0.005 |
| R45 | -0.0236 | 0.004 | -5.411 | 0.000 | -0.032 | -0.015 |
| R46 | 0.0368 | 0.004 | 8.458 | 0.000 | 0.028 | 0.045 |
| R47 | 0.0186 | 0.004 | 4.267 | 0.000 | 0.010 | 0.027 |
| R48 | 0.0104 | 0.004 | 2.387 | 0.017 | 0.002 | 0.019 |
| R49 | 0.0274 | 0.004 | 6.249 | 0.000 | 0.019 | 0.036 |
| R50 | -0.0252 | 0.004 | -5.750 | 0.000 | -0.034 | -0.017 |
| R51 | -0.0048 | 0.004 | -1.091 | 0.275 | -0.013 | 0.004 |
| R52 | -0.0266 | 0.004 | -6.051 | 0.000 | -0.035 | -0.018 |
| R53 | -0.0176 | 0.004 | -4.015 | 0.000 | -0.026 | -0.009 |
| R54 | -0.0359 | 0.004 | -8.230 | 0.000 | -0.044 | -0.027 |
| R55 | 0.0260 | 0.004 | 5.959 | 0.000 | 0.017 | 0.034 |
| R56 | 0.0013 | 0.004 | 0.301 | 0.764 | -0.007 | 0.010 |
| R57 | -0.0096 | 0.004 | -2.168 | 0.030 | -0.018 | -0.001 |
| R58 | -0.0100 | 0.004 | -2.259 | 0.024 | -0.019 | -0.001 |
| R59 | 0.0178 | 0.004 | 4.082 | 0.000 | 0.009 | 0.026 |
| R60 | 0.0340 | 0.004 | 7.865 | 0.000 | 0.025 | 0.042 |
| R61 | 0.0130 | 0.004 | 3.006 | 0.003 | 0.005 | 0.021 |
| R62 | -0.0283 | 0.004 | -6.534 | 0.000 | -0.037 | -0.020 |
| R63 | 0.0120 | 0.004 | 2.759 | 0.006 | 0.003 | 0.021 |
| R64 | 0.0182 | 0.004 | 4.189 | 0.000 | 0.010 | 0.027 |
| R65 | 0.0515 | 0.004 | 11.897 | 0.000 | 0.043 | 0.060 |
| R66 | 0.0165 | 0.004 | 3.799 | 0.000 | 0.008 | 0.025 |
| R67 | -0.0107 | 0.004 | -2.483 | 0.013 | -0.019 | -0.002 |
| R68 | -0.0235 | 0.004 | -5.471 | 0.000 | -0.032 | -0.015 |
| R69 | 0.0318 | 0.004 | 7.438 | 0.000 | 0.023 | 0.040 |
| R70 | 0.0285 | 0.004 | 6.683 | 0.000 | 0.020 | 0.037 |
| R71 | 0.0134 | 0.004 | 3.122 | 0.002 | 0.005 | 0.022 |
| R72 | -0.0096 | 0.004 | -2.211 | 0.027 | -0.018 | -0.001 |
| R73 | 0.0211 | 0.004 | 4.869 | 0.000 | 0.013 | 0.030 |
| R74 | -0.0005 | 0.004 | -0.121 | 0.904 | -0.009 | 0.008 |
| R75 | 0.0395 | 0.004 | 8.932 | 0.000 | 0.031 | 0.048 |
| R76 | -0.0160 | 0.004 | -3.659 | 0.000 | -0.025 | -0.007 |
| R77 | 0.0452 | 0.004 | 10.432 | 0.000 | 0.037 | 0.054 |
| R78 | -0.0003 | 0.004 | -0.072 | 0.942 | -0.009 | 0.008 |
| R79 | 0.0446 | 0.004 | 10.387 | 0.000 | 0.036 | 0.053 |
| R80 | 0.0116 | 0.004 | 2.695 | 0.007 | 0.003 | 0.020 |
| R81 | 0.0014 | 0.004 | 0.327 | 0.744 | -0.007 | 0.010 |
| R82 | -0.0135 | 0.004 | -3.113 | 0.002 | -0.022 | -0.005 |
| R83 | 0.0027 | 0.004 | 0.615 | 0.539 | -0.006 | 0.011 |
| R84 | -0.0122 | 0.004 | -2.778 | 0.005 | -0.021 | -0.004 |
| R85 | 0.0146 | 0.004 | 3.366 | 0.001 | 0.006 | 0.023 |
| R86 | -0.0239 | 0.004 | -5.544 | 0.000 | -0.032 | -0.015 |
| R87 | -0.0453 | 0.004 | -10.437 | 0.000 | -0.054 | -0.037 |
| R88 | 0.0508 | 0.004 | 11.641 | 0.000 | 0.042 | 0.059 |
| R89 | -0.0276 | 0.004 | -6.231 | 0.000 | -0.036 | -0.019 |
| R90 | 0.0072 | 0.004 | 1.615 | 0.106 | -0.002 | 0.016 |
| R91 | -0.0033 | 0.004 | -0.745 | 0.456 | -0.012 | 0.005 |
| R92 | 0.0300 | 0.004 | 6.793 | 0.000 | 0.021 | 0.039 |
| R93 | 0.0650 | 0.004 | 14.749 | 0.000 | 0.056 | 0.074 |
| R94 | 0.0093 | 0.004 | 2.100 | 0.036 | 0.001 | 0.018 |
| R95 | -0.0057 | 0.004 | -1.294 | 0.196 | -0.014 | 0.003 |
| R96 | -0.0295 | 0.004 | -6.614 | 0.000 | -0.038 | -0.021 |
| R97 | -0.0083 | 0.004 | -1.864 | 0.062 | -0.017 | 0.000 |
| R98 | 0.0016 | 0.004 | 0.363 | 0.717 | -0.007 | 0.010 |
| R99 | 0.0043 | 0.004 | 0.973 | 0.331 | -0.004 | 0.013 |
| R100 | 0.0259 | 0.004 | 5.800 | 0.000 | 0.017 | 0.035 |
| R101 | -0.0216 | 0.004 | -4.869 | 0.000 | -0.030 | -0.013 |
| R102 | 0.0216 | 0.004 | 4.851 | 0.000 | 0.013 | 0.030 |
| R103 | 0.0271 | 0.004 | 6.072 | 0.000 | 0.018 | 0.036 |
| R104 | -0.0176 | 0.004 | -3.987 | 0.000 | -0.026 | -0.009 |
| R105 | 0.0224 | 0.004 | 5.063 | 0.000 | 0.014 | 0.031 |
| R106 | -0.0103 | 0.004 | -2.343 | 0.019 | -0.019 | -0.002 |
| R107 | 0.0310 | 0.004 | 7.000 | 0.000 | 0.022 | 0.040 |
| R108 | 0.0138 | 0.004 | 3.083 | 0.002 | 0.005 | 0.023 |
| R109 | -0.0063 | 0.005 | -1.385 | 0.166 | -0.015 | 0.003 |
| R110 | -0.0059 | 0.005 | -1.301 | 0.193 | -0.015 | 0.003 |
| R111 | -0.0295 | 0.004 | -6.568 | 0.000 | -0.038 | -0.021 |
| R112 | 0.0221 | 0.004 | 4.934 | 0.000 | 0.013 | 0.031 |
| R113 | 0.0066 | 0.004 | 1.487 | 0.137 | -0.002 | 0.015 |
| R114 | 0.0315 | 0.004 | 7.109 | 0.000 | 0.023 | 0.040 |
| R115 | 0.0174 | 0.004 | 3.924 | 0.000 | 0.009 | 0.026 |
| R116 | -0.0181 | 0.004 | -4.091 | 0.000 | -0.027 | -0.009 |
| R117 | 0.0108 | 0.004 | 2.426 | 0.015 | 0.002 | 0.019 |
| R118 | 0.0082 | 0.004 | 1.839 | 0.066 | -0.001 | 0.017 |
| R119 | -0.0075 | 0.004 | -1.662 | 0.097 | -0.016 | 0.001 |
| R120 | 0.0127 | 0.005 | 2.783 | 0.005 | 0.004 | 0.022 |
| V1 | -1.62e-10 | 3.63e-11 | -4.460 | 0.000 | -2.33e-10 | -9.08e-11 |
| V2 | 1.114e-11 | 4.1e-11 | 0.272 | 0.786 | -6.92e-11 | 9.15e-11 |
| V3 | 8.094e-11 | 4.15e-11 | 1.950 | 0.051 | -4.06e-13 | 1.62e-10 |
| V4 | 1.118e-10 | 4.19e-11 | 2.671 | 0.008 | 2.98e-11 | 1.94e-10 |
| V5 | 5.18e-11 | 4.25e-11 | 1.220 | 0.223 | -3.14e-11 | 1.35e-10 |
| V6 | 2.772e-11 | 4.27e-11 | 0.649 | 0.516 | -5.59e-11 | 1.11e-10 |
| V7 | 2.534e-10 | 4.27e-11 | 5.930 | 0.000 | 1.7e-10 | 3.37e-10 |
| V8 | -2.834e-10 | 4.27e-11 | -6.635 | 0.000 | -3.67e-10 | -2e-10 |
| V9 | -1.458e-10 | 4.28e-11 | -3.403 | 0.001 | -2.3e-10 | -6.18e-11 |
| V10 | 1.236e-10 | 4.3e-11 | 2.874 | 0.004 | 3.93e-11 | 2.08e-10 |
| V11 | -2.357e-10 | 4.31e-11 | -5.472 | 0.000 | -3.2e-10 | -1.51e-10 |
| V12 | -2.638e-13 | 4.24e-11 | -0.006 | 0.995 | -8.33e-11 | 8.28e-11 |
| V13 | -4.3e-11 | 4.22e-11 | -1.019 | 0.308 | -1.26e-10 | 3.97e-11 |
| V14 | 7.052e-12 | 4.23e-11 | 0.167 | 0.868 | -7.58e-11 | 8.99e-11 |
| V15 | 8.602e-11 | 4.23e-11 | 2.036 | 0.042 | 3.19e-12 | 1.69e-10 |
| V16 | 4.241e-11 | 4.22e-11 | 1.004 | 0.315 | -4.04e-11 | 1.25e-10 |
| V17 | 4.507e-11 | 4.22e-11 | 1.069 | 0.285 | -3.76e-11 | 1.28e-10 |
| V18 | 7.3e-11 | 4.21e-11 | 1.733 | 0.083 | -9.58e-12 | 1.56e-10 |
| V19 | 1.327e-10 | 4.14e-11 | 3.203 | 0.001 | 5.15e-11 | 2.14e-10 |
| V20 | -3.195e-11 | 4.12e-11 | -0.775 | 0.438 | -1.13e-10 | 4.88e-11 |
| V21 | -1.217e-10 | 3.72e-11 | -3.274 | 0.001 | -1.95e-10 | -4.89e-11 |
| Cret252_125 | -0.1037 | 0.057 | -1.806 | 0.071 | -0.216 | 0.009 |
| Cret125_21 | -0.9479 | 0.200 | -4.747 | 0.000 | -1.339 | -0.557 |
| Omnibus: | 8445.364 | Durbin-Watson: | 1.024 |
|---|---|---|---|
| Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 70001.923 |
| Skew: | 0.288 | Prob(JB): | 0.00 |
| Kurtosis: | 7.860 | Cond. No. | 6.73e+10 |
## OLS score:
strategy_name = 'OLS'
OLS_weights = compute_weights(df_score, universe_test)
summary, pnls = compute_summary(OLS_weights, ret, lags=[0, 1, 2, 3, 5, 10, 21])
display(summary)
pnls.dropna(how='all').cumsum().iplot(title='strategy {}: PNL plot with different signal lags'.format(strategy_name))
| 0 | 1 | 2 | 3 | 5 | 10 | 21 | |
|---|---|---|---|---|---|---|---|
| sharpe | 0.579851 | -0.071927 | -0.007785 | -1.064029 | -0.018862 | 0.115127 | -0.886951 |
| turnover | 128.460100 | 128.451134 | 128.433827 | 128.436152 | 128.430691 | 128.419811 | 128.380318 |
df_score_lasso = pd.DataFrame(columns=universe.columns) # empty dataframe to append
beg_train_list = pd.date_range('2013-01', '2019-03', freq='M').strftime('%Y-%m')
beg_test_list = pd.date_range('2015-01', '2021-03', freq='M').strftime('%Y-%m')
end_test_list = pd.date_range('2015-02', '2021-04', freq='M').strftime('%Y-%m')
date_list = list(zip(beg_train_list, beg_test_list, end_test_list))
for beg_train, beg_test, end_test in tqdm(date_list, leave=True):
X_train_val = df_fset[x_list].loc[beg_train:beg_test, :]
y_train_val = df_fset['y'].loc[beg_train:beg_test, :]
X_test = df_fset[x_list].loc[beg_test:end_test, :]
lasso = LassoCV(cv=5, random_state=0).fit(X_train_val, y_train_val)
predicted = lasso.predict(X_test)
lasso_score = pd.Series(predicted.reshape(-1,), index = X_test.index).unstack()
df_score_lasso = df_score_lasso.append(lasso_score)
df_score_lasso.tail()
100%|██████████████████████████████████████████████████████████████████████████████████| 74/74 [06:34<00:00, 5.33s/it]
| ticker | 1332 JT | 1333 JT | 1334 JT | 1605 JT | 1721 JT | 1801 JT | 1802 JT | 1803 JT | 1808 JT | 1812 JT | ... | 9503 JT | 9531 JT | 9532 JT | 9602 JT | 9613 JT | 9681 JT | 9735 JT | 9766 JT | 9983 JT | 9984 JT |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2021-02-19 | 0.000012 | 0.000012 | NaN | 0.000012 | 0.000012 | 0.000012 | 0.000012 | 0.000012 | 0.000012 | 0.000012 | ... | 0.000012 | 0.000012 | 0.000012 | 0.000012 | 0.000012 | NaN | 0.000012 | 0.000012 | 0.000012 | 0.000012 |
| 2021-02-22 | 0.000012 | 0.000012 | NaN | 0.000012 | 0.000012 | 0.000012 | 0.000012 | 0.000012 | 0.000012 | 0.000012 | ... | 0.000012 | 0.000012 | 0.000012 | 0.000012 | 0.000012 | NaN | 0.000012 | 0.000012 | 0.000012 | 0.000012 |
| 2021-02-24 | 0.000012 | 0.000012 | NaN | 0.000012 | 0.000012 | 0.000012 | 0.000012 | 0.000012 | 0.000012 | 0.000012 | ... | 0.000012 | 0.000012 | 0.000012 | 0.000012 | 0.000012 | NaN | 0.000012 | 0.000012 | 0.000012 | 0.000012 |
| 2021-02-25 | 0.000012 | 0.000012 | NaN | 0.000012 | 0.000012 | 0.000012 | 0.000012 | 0.000012 | 0.000012 | 0.000012 | ... | 0.000012 | 0.000012 | 0.000012 | 0.000012 | 0.000012 | NaN | 0.000012 | 0.000012 | 0.000012 | 0.000012 |
| 2021-02-26 | 0.000012 | 0.000012 | NaN | 0.000012 | 0.000012 | 0.000012 | 0.000012 | 0.000012 | 0.000012 | 0.000012 | ... | 0.000012 | 0.000012 | 0.000012 | 0.000012 | 0.000012 | NaN | 0.000012 | 0.000012 | 0.000012 | 0.000012 |
5 rows × 248 columns
## Lasso score:
strategy_name = 'Lasso'
lasso_weights = compute_weights(df_score_lasso, universe_test)
summary, pnls = compute_summary(lasso_weights, ret, lags=[0, 1, 2, 3, 5, 10, 21])
display(summary)
pnls.dropna(how='all').cumsum().iplot(title='strategy {}: PNL plot with different signal lags'.format(strategy_name))
| 0 | 1 | 2 | 3 | 5 | 10 | 21 | |
|---|---|---|---|---|---|---|---|
| sharpe | 0.005269 | 0.181887 | 0.272670 | 0.301129 | 0.349436 | 0.075860 | 0.248966 |
| turnover | 29.119584 | 29.119584 | 29.119584 | 29.119584 | 29.119584 | 29.119584 | 29.212619 |
# basic settings for NN
n_epochs = 5
network = models.Sequential()
network.add(layers.Dense(512, activation = 'relu', input_shape=(143,)))
network.add(layers.Dense(256, activation = 'relu'))
network.add(layers.Dropout(.5))
network.add(layers.Dense(1))
network.compile(optimizer='adam',loss='mean_absolute_error')
df_score_nn = pd.DataFrame(columns=universe.columns) # empty dataframe to append
beg_train_list = pd.date_range('2013', '2019', freq='Y').strftime('%Y')
beg_test_list = pd.date_range('2015', '2021', freq='Y').strftime('%Y')
end_test_list = pd.date_range('2016', '2022', freq='Y').strftime('%Y')
date_list = list(zip(beg_train_list, beg_test_list, end_test_list))
for beg_train, beg_test, end_test in tqdm(date_list, leave=True):
X_train_val = df_fset[x_list].loc[beg_train:beg_test, :]
y_train_val = df_fset['y'].loc[beg_train:beg_test, :]
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.25, random_state=4)
X_test = df_fset[x_list].loc[beg_test:end_test, :]
network.fit(X_train, y_train.to_numpy(), validation_data = (X_val, y_val.to_numpy()),
epochs=n_epochs, batch_size=128)
predicted = network.predict(X_test)
nn_score = pd.Series(predicted.reshape(-1,), index = X_test.index).unstack()
df_score_nn = df_score_nn.append(nn_score)
df_score_nn.tail()
0%| | 0/6 [00:00<?, ?it/s]
Epoch 1/5 394/394 [==============================] - 2s 5ms/step - loss: 0.0133 - val_loss: 0.0134 Epoch 2/5 394/394 [==============================] - 2s 5ms/step - loss: 0.0133 - val_loss: 0.0134 Epoch 3/5 394/394 [==============================] - 2s 6ms/step - loss: 0.0133 - val_loss: 0.0133 Epoch 4/5 394/394 [==============================] - 2s 6ms/step - loss: 0.0133 - val_loss: 0.0133 Epoch 5/5 394/394 [==============================] - 2s 6ms/step - loss: 0.0133 - val_loss: 0.0133
17%|██████████████ | 1/6 [00:13<01:09, 13.85s/it]
Epoch 1/5 641/641 [==============================] - 3s 5ms/step - loss: 0.0139 - val_loss: 0.0139 Epoch 2/5 641/641 [==============================] - 3s 5ms/step - loss: 0.0139 - val_loss: 0.0139 Epoch 3/5 641/641 [==============================] - 3s 5ms/step - loss: 0.0139 - val_loss: 0.0140 Epoch 4/5 641/641 [==============================] - 4s 6ms/step - loss: 0.0139 - val_loss: 0.0139 Epoch 5/5 641/641 [==============================] - 4s 7ms/step - loss: 0.0139 - val_loss: 0.0139
33%|████████████████████████████ | 2/6 [00:35<01:12, 18.21s/it]
Epoch 1/5 640/640 [==============================] - 3s 5ms/step - loss: 0.0160 - val_loss: 0.0162 Epoch 2/5 640/640 [==============================] - 3s 4ms/step - loss: 0.0160 - val_loss: 0.0162 Epoch 3/5 640/640 [==============================] - 3s 4ms/step - loss: 0.0160 - val_loss: 0.0162 Epoch 4/5 640/640 [==============================] - 3s 4ms/step - loss: 0.0160 - val_loss: 0.0162 Epoch 5/5 640/640 [==============================] - 3s 5ms/step - loss: 0.0160 - val_loss: 0.0162
50%|██████████████████████████████████████████ | 3/6 [00:52<00:53, 17.71s/it]
Epoch 1/5 641/641 [==============================] - 3s 5ms/step - loss: 0.0140 - val_loss: 0.0140 Epoch 2/5 641/641 [==============================] - 3s 5ms/step - loss: 0.0141 - val_loss: 0.0140 Epoch 3/5 641/641 [==============================] - 3s 5ms/step - loss: 0.0141 - val_loss: 0.0140 Epoch 4/5 641/641 [==============================] - 3s 5ms/step - loss: 0.0140 - val_loss: 0.0140 Epoch 5/5 641/641 [==============================] - 3s 5ms/step - loss: 0.0141 - val_loss: 0.0140
67%|████████████████████████████████████████████████████████ | 4/6 [01:10<00:36, 18.03s/it]
Epoch 1/5 642/642 [==============================] - 4s 6ms/step - loss: 0.0120 - val_loss: 0.0119 Epoch 2/5 642/642 [==============================] - 4s 6ms/step - loss: 0.0120 - val_loss: 0.0119 Epoch 3/5 642/642 [==============================] - 3s 5ms/step - loss: 0.0120 - val_loss: 0.0118 Epoch 4/5 642/642 [==============================] - 4s 6ms/step - loss: 0.0120 - val_loss: 0.0119 Epoch 5/5 642/642 [==============================] - 4s 6ms/step - loss: 0.0120 - val_loss: 0.0118
83%|██████████████████████████████████████████████████████████████████████ | 5/6 [01:32<00:19, 19.40s/it]
Epoch 1/5 634/634 [==============================] - 4s 6ms/step - loss: 0.0128 - val_loss: 0.0127 Epoch 2/5 634/634 [==============================] - 4s 6ms/step - loss: 0.0128 - val_loss: 0.0127 Epoch 3/5 634/634 [==============================] - 4s 6ms/step - loss: 0.0128 - val_loss: 0.0128 Epoch 4/5 634/634 [==============================] - 4s 7ms/step - loss: 0.0128 - val_loss: 0.0127 Epoch 5/5 634/634 [==============================] - 3s 5ms/step - loss: 0.0128 - val_loss: 0.0127
100%|████████████████████████████████████████████████████████████████████████████████████| 6/6 [01:55<00:00, 19.19s/it]
| ticker | 1332 JT | 1333 JT | 1334 JT | 1605 JT | 1721 JT | 1801 JT | 1802 JT | 1803 JT | 1808 JT | 1812 JT | ... | 9503 JT | 9531 JT | 9532 JT | 9602 JT | 9613 JT | 9681 JT | 9735 JT | 9766 JT | 9983 JT | 9984 JT |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2020-12-24 | -0.000653 | -0.000653 | NaN | -0.000653 | -0.000653 | -0.000653 | -0.000653 | -0.000653 | -0.000653 | -0.000653 | ... | -0.000653 | -0.000653 | -0.000653 | -0.000653 | -0.000653 | NaN | -0.000653 | -0.000653 | -0.000653 | -0.000653 |
| 2020-12-25 | -0.000653 | -0.000653 | NaN | -0.000653 | -0.000653 | -0.000653 | -0.000653 | -0.000653 | -0.000653 | -0.000653 | ... | -0.000653 | -0.000653 | -0.000653 | -0.000653 | -0.000653 | NaN | -0.000653 | -0.000653 | -0.000653 | -0.000653 |
| 2020-12-28 | -0.000653 | -0.000653 | NaN | -0.000653 | -0.000653 | -0.000653 | -0.000653 | -0.000653 | -0.000653 | -0.000653 | ... | -0.000653 | -0.000653 | -0.000653 | -0.000653 | -0.000653 | NaN | -0.000653 | -0.000653 | -0.000653 | -0.000653 |
| 2020-12-29 | -0.000653 | -0.000653 | NaN | -0.000653 | -0.000653 | -0.000653 | -0.000653 | -0.000653 | -0.000653 | -0.000653 | ... | -0.000653 | -0.000653 | -0.000653 | -0.000653 | -0.000653 | NaN | -0.000653 | -0.000653 | -0.000653 | -0.000653 |
| 2020-12-30 | -0.000653 | -0.000653 | NaN | -0.000653 | -0.000653 | -0.000653 | -0.000653 | -0.000653 | -0.000653 | -0.000653 | ... | -0.000653 | -0.000653 | -0.000653 | -0.000653 | -0.000653 | NaN | -0.000653 | -0.000653 | -0.000653 | -0.000653 |
5 rows × 248 columns
## NN score:
strategy_name = 'NeuralNetwork'
nn_weights = compute_weights(df_score_nn, universe_test)
summary, pnls = compute_summary(nn_weights, ret, lags=[0, 1, 2, 3, 5, 10, 21])
display(summary)
pnls.dropna(how='all').cumsum().iplot(title='strategy {}: PNL plot with different signal lags'.format(strategy_name))
| 0 | 1 | 2 | 3 | 5 | 10 | 21 | |
|---|---|---|---|---|---|---|---|
| sharpe | 0.108667 | 0.135279 | 0.147450 | 0.152669 | 0.170700 | 0.154035 | 0.157906 |
| turnover | 0.206801 | 0.206801 | 0.206801 | 0.206801 | 0.206801 | 0.206801 | 0.206801 |
n_epochs = 5
network = models.Sequential()
network.add(layers.LSTM(10, return_sequences = True, input_shape=(119,1)))
network.add(layers.LSTM(10))
network.add(layers.Dense(1))
network.compile(optimizer='adam',loss='mean_absolute_error')
network.fit(X_train, y_train.to_numpy(), validation_data = (X_val, y_val.to_numpy()), epochs=n_epochs, batch_size=128)
Epoch 1/5 720/720 [==============================] - 103s 139ms/step - loss: 0.0141 - val_loss: 0.0141 Epoch 2/5 720/720 [==============================] - 98s 136ms/step - loss: 0.0141 - val_loss: 0.0142 Epoch 3/5 720/720 [==============================] - 91s 127ms/step - loss: 0.0141 - val_loss: 0.0141 Epoch 4/5 720/720 [==============================] - 65s 90ms/step - loss: 0.0140 - val_loss: 0.0141 Epoch 5/5 720/720 [==============================] - 64s 88ms/step - loss: 0.0141 - val_loss: 0.0142
<keras.callbacks.History at 0x1aed8779430>
predicted = network.predict(X_test)
LSTM_score = pd.Series(predicted.reshape(-1,), index = y_test.index).unstack()
## LSTM score:
strategy_name = 'LSTM'
LSTM_weights = compute_weights(LSTM_score, universe_test)
summary, pnls = compute_summary(LSTM_weights, ret, lags=[0, 1, 2, 3, 5, 10, 21])
display(summary)
pnls.dropna(how='all').cumsum().iplot(title='strategy {}: PNL plot with different signal lags'.format(strategy_name))
| 0 | 1 | 2 | 3 | 5 | 10 | 21 | |
|---|---|---|---|---|---|---|---|
| sharpe | -0.683343 | -0.720039 | -0.569934 | 0.006965 | 0.551530 | 0.670222 | -0.109505 |
| turnover | 34.505832 | 34.503007 | 34.509394 | 34.516899 | 34.525645 | 34.500248 | 34.531180 |
df_score_LGBM = pd.DataFrame(columns=universe.columns) # empty dataframe to append
beg_train_list = pd.date_range('2013', '2019', freq='Y').strftime('%Y')
beg_test_list = pd.date_range('2015', '2021', freq='Y').strftime('%Y')
end_test_list = pd.date_range('2016', '2022', freq='Y').strftime('%Y')
date_list = list(zip(beg_train_list, beg_test_list, end_test_list))
for beg_train, beg_test, end_test in tqdm(date_list, leave=True):
X_train_val = df_fset[x_list].loc[beg_train:beg_test, :]
y_train_val = df_fset['y'].loc[beg_train:beg_test, :]
X_test = df_fset[x_list].loc[beg_test:end_test, :]
model = LGBMRegressor(max_depth = 10, n_estimators = 500, subsample = 0.7)
model.fit(X_train_val, y_train_val, eval_metric = 'rmse')
predicted = model.predict(X_test)
LGBM_score = pd.Series(predicted.reshape(-1,), index = X_test.index).unstack()
df_score_LGBM = df_score_LGBM.append(LGBM_score)
df_score_LGBM.tail()
100%|████████████████████████████████████████████████████████████████████████████████████| 6/6 [02:10<00:00, 21.78s/it]
| ticker | 1332 JT | 1333 JT | 1334 JT | 1605 JT | 1721 JT | 1801 JT | 1802 JT | 1803 JT | 1808 JT | 1812 JT | ... | 9503 JT | 9531 JT | 9532 JT | 9602 JT | 9613 JT | 9681 JT | 9735 JT | 9766 JT | 9983 JT | 9984 JT |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2020-12-24 | -0.002729 | -0.001188 | NaN | 0.004564 | -0.000282 | -0.008323 | -0.005142 | -0.004552 | 0.000091 | 0.001318 | ... | 0.001861 | -0.011475 | -0.001977 | -0.002410 | -0.007330 | NaN | -0.005140 | -0.001013 | 0.000967 | -0.005354 |
| 2020-12-25 | 0.005927 | 0.001548 | NaN | 0.007956 | 0.003815 | -0.000047 | -0.002861 | -0.002920 | 0.003436 | -0.002125 | ... | 0.003704 | 0.002314 | -0.001454 | -0.005011 | 0.000432 | NaN | -0.006503 | 0.002486 | 0.001248 | -0.001518 |
| 2020-12-28 | 0.003506 | -0.001922 | NaN | 0.003467 | -0.000939 | 0.003951 | -0.000136 | -0.002224 | 0.006376 | 0.003343 | ... | 0.000149 | -0.000348 | -0.001732 | 0.000011 | -0.002021 | NaN | 0.003935 | -0.004756 | -0.001328 | 0.000467 |
| 2020-12-29 | 0.004839 | 0.002305 | NaN | 0.010533 | 0.000897 | 0.001370 | 0.002339 | -0.001614 | -0.001041 | 0.003821 | ... | 0.003507 | 0.003271 | -0.001189 | 0.002016 | 0.000816 | NaN | -0.002342 | 0.005039 | -0.000772 | 0.000869 |
| 2020-12-30 | -0.004356 | -0.002588 | NaN | -0.003307 | -0.003060 | -0.003068 | 0.000287 | -0.000452 | -0.001191 | 0.001519 | ... | -0.001460 | 0.006928 | 0.012865 | -0.002065 | -0.003888 | NaN | 0.005850 | -0.008663 | 0.001287 | 0.003763 |
5 rows × 248 columns
## LGBM score:
strategy_name = 'LGBM'
LGBM_weights = compute_weights(df_score_LGBM, universe_test)
summary, pnls = compute_summary(LGBM_weights, ret, lags=[0, 1, 2, 3, 5, 10, 21])
display(summary)
pnls.dropna(how='all').cumsum().iplot(title='strategy {}: PNL plot with different signal lags'.format(strategy_name))
| 0 | 1 | 2 | 3 | 5 | 10 | 21 | |
|---|---|---|---|---|---|---|---|
| sharpe | 0.917374 | 0.505233 | -0.066385 | -0.607471 | -0.448581 | -0.163732 | -0.102853 |
| turnover | 128.778105 | 128.778105 | 128.778105 | 128.778105 | 128.778105 | 128.778105 | 128.778105 |
## an invert momentum strategy (dollar-neutral):
strategy_name = 'negated momentum'
momentum_score = -ret.rolling(window = 252-21, min_periods =100).mean().shift(21)
momentum_weights = compute_weights(momentum_score, universe)
summary, pnls = compute_summary(momentum_weights, ret, lags=[0, 1, 2, 3, 5, 10, 21])
display(summary)
pnls.dropna(how='all').cumsum().iplot(title='strategy {}: PNL plot with different signal lags'.format(strategy_name))
| 0 | 1 | 2 | 3 | 5 | 10 | 21 | |
|---|---|---|---|---|---|---|---|
| sharpe | 0.432898 | 0.458695 | 0.445947 | 0.440282 | 0.424388 | 0.441555 | 0.440533 |
| turnover | 8.514653 | 8.512746 | 8.512596 | 8.512349 | 8.511250 | 8.509332 | 8.512023 |
## a beta-neutral negated momentum strategy :
strategy_name = 'negated momentum_beta_neutral'
momentum_score = -ret.rolling(window = 252-21, min_periods =100).mean().shift(21)
momentum_weights = compute_weights(momentum_score, universe)
# simplified beta neutral (no longer dollar neutral)
benchmark_weights = universe.astype(float).divide(universe.sum(axis=1).replace(0., np.nan), axis=0).mask(~universe)
momentum_weights -= benchmark_weights.multiply( (momentum_weights * beta).sum(axis=1, min_count=1), axis=0 )
summary, pnls = compute_summary(momentum_weights, ret, lags=[0, 1, 2, 3, 5, 10, 21])
display(summary)
pnls.dropna(how='all').cumsum().iplot(title='strategy {}: PNL plot with different signal lags'.format(strategy_name))
| 0 | 1 | 2 | 3 | 5 | 10 | 21 | |
|---|---|---|---|---|---|---|---|
| sharpe | 0.309715 | 0.345812 | 0.323874 | 0.332673 | 0.308685 | 0.314419 | 0.261722 |
| turnover | 8.670816 | 8.668588 | 8.668449 | 8.668272 | 8.667262 | 8.665426 | 8.669002 |
## a simple mean-reversion strategy:
strategy_name = 'mean_reversion'
for halflife in [2, 3, 10, 21, 63]:
mr_score = -ret.ewm(halflife = halflife).mean() / ret.rolling(window = 252, min_periods = 10).std()
mr_weights = compute_weights(mr_score, universe)
# beta neutral
mr_weights -= benchmark_weights.multiply((mr_weights * beta).sum(axis=1, min_count=1), axis=0)
summary, pnls = compute_summary(mr_weights, ret, lags=[0, 1, 2, 3, 5, 10, 21])
display(summary)
pnls.dropna(how='all').cumsum().iplot(title='strategy {}-{}: PNL plot with different lags'.format(
strategy_name, halflife))
| 0 | 1 | 2 | 3 | 5 | 10 | 21 | |
|---|---|---|---|---|---|---|---|
| sharpe | 0.486191 | 0.689680 | 0.358689 | 0.087840 | 0.305915 | -0.144802 | 0.733266 |
| turnover | 67.143878 | 67.142692 | 67.137188 | 67.136699 | 67.129356 | 67.132804 | 67.119582 |
| 0 | 1 | 2 | 3 | 5 | 10 | 21 | |
|---|---|---|---|---|---|---|---|
| sharpe | 0.519963 | 0.664788 | 0.358947 | 0.136270 | 0.315408 | -0.024181 | 0.715468 |
| turnover | 55.790648 | 55.790144 | 55.785468 | 55.785079 | 55.782824 | 55.791035 | 55.792196 |
| 0 | 1 | 2 | 3 | 5 | 10 | 21 | |
|---|---|---|---|---|---|---|---|
| sharpe | 0.565444 | 0.673984 | 0.442900 | 0.320421 | 0.426235 | 0.292181 | 0.405324 |
| turnover | 31.285411 | 31.286221 | 31.285398 | 31.287589 | 31.294315 | 31.304556 | 31.311598 |
| 0 | 1 | 2 | 3 | 5 | 10 | 21 | |
|---|---|---|---|---|---|---|---|
| sharpe | 0.464761 | 0.537188 | 0.358141 | 0.275799 | 0.334010 | 0.296552 | 0.263367 |
| turnover | 21.727743 | 21.727997 | 21.727784 | 21.729572 | 21.733584 | 21.736634 | 21.726422 |
| 0 | 1 | 2 | 3 | 5 | 10 | 21 | |
|---|---|---|---|---|---|---|---|
| sharpe | 0.274072 | 0.321773 | 0.226614 | 0.198972 | 0.216398 | 0.177175 | 0.112696 |
| turnover | 12.879340 | 12.876441 | 12.874896 | 12.874120 | 12.872355 | 12.860706 | 12.842823 |